package com.primi.chen.spiders.proxypool;

import com.cv4j.proxy.ProxyListPageParser;
import com.cv4j.proxy.domain.Proxy;
import lombok.extern.slf4j.Slf4j;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.nodes.Node;
import java.util.ArrayList;
import java.util.List;

/**
 * Create AT 2020/3/9 10:58:10:58
 *
 * @author Primi.Chen(Chenxiangxu)
 */
@Slf4j
public class ProxyListPlusPageParser implements ProxyListPageParser {
    @Override
    public List<Proxy> parse(String content) {
        List<Proxy> proxies = new ArrayList<>();
        Document document = Jsoup.parse(content);
        //Elements elements = document.select("div[class=table-responsive] table tbody tr:gt(0)");
        Element body = document.select("body").get(0);
        for (Node childNode : body.childNodes()) {
            String node = childNode.toString().trim();
            if (node.matches("[0-9\\.\\:].*")) {
                // log.info("{}", node);
                String[] ip_port = node.split(":");
                Proxy proxy = new Proxy();
                proxy.setId(node);
                proxy.setIp(ip_port[0]);
                proxy.setPort(Integer.parseInt(ip_port[1]));
                proxy.setType("http");
                proxy.setAnonymousFlag(true);
                proxies.add(proxy);
            }
        }
        return proxies;
    }
}
